package data_deepprocessing.algorithm.cvalue_ncvalue_tfidf.util;

import java.io.IOException;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.logging.Logger;

import data_deepprocessing.algorithm.cvalue_ncvalue_tfidf.bean.CValueBean;
import uk.ac.shef.dcs.oak.jate.JATEProperties;
import uk.ac.shef.dcs.oak.jate.core.context.ContextExtraction;
import uk.ac.shef.dcs.oak.jate.test.AlgorithmTester;
import uk.ac.shef.dcs.oak.jate.util.control.StopList;



public class ContextExtractionYYH extends ContextExtraction{
	
	static Logger logger = Logger.getLogger(ContextExtractionYYH.class.getName());
	
	public ContextExtractionYYH(AlgorithmTester tester, StopList stoplist) throws IOException {
		super(tester, stoplist, null);
	}
	
	/** Main function for this class. */
	public Map<String, Double> Extract(List<String> sentences) throws IOException
	{
		/* Get the percentage of the C-Value Terms which are to be considered as 'top candidate terms' for context words identification.
		 * This percentage value is configurable in jate.properties file.
		 */
		int percent_TopTerms = JATEProperties.getInstance().getPercentage();//目前默认的是100%，这个内容以后的话也可以加以利用
		getTopTerms(percent_TopTerms);
		ContextIdentification(sentences);	//导入数据集	
		Calculate_CW_Weight();
		return ContextWord_Map;
	}
	
	
	public Set<String> getTopTerms(int percent_TopTerms){
		
		for(CValueBean bean : NCValueTool.testCvalue())
			TopTerms_variants.add(bean.getSeed_Content());		//得到某个term的变化的所有形式
		logger.info("用到的term");
		for(String term: TopTerms_variants){
			logger.info(term);
		}
		
		TopTerms_Count = TopTerms_variants.size();
		logger.info("TopTerms_Count :"+ TopTerms_Count+"个");
		return TopTerms_variants;
	}
	/** This function makes calls to other functions for identification of the context words by making use of the 'top' candidate terms identified earlier.  */
	private void ContextIdentification(List<String> sentences) throws IOException {
		for (String sentence: sentences) {
			Set<String> sent_TopTermVariants = new HashSet<String>();
			sent_TopTermVariants.addAll(UtilityYYH.getTermVariants_sent(sentence, TopTerms_variants));
			if(sent_TopTermVariants.size()>0){
				Set<String> ContextWords = ExtractContextWords(sentence, sent_TopTermVariants);					
				if( ContextWords.size() > 0 ){ 
					CreateContext_To_TermMap(ContextWords, sent_TopTermVariants);
				}
			}
		}
		logger.info("context + term set ");
		for(String key : ContextWord_to_Term_Map.keySet()){
			logger.info(key + " -> "+ ContextWord_to_Term_Map.get(key));
		}
	}
	

	
	protected void Calculate_CW_Weight() {
		for(Map.Entry<String, Set<String>> e : ContextWord_to_Term_Map.entrySet()){			
			double weight = e.getValue().size()/(double)TopTerms_Count;
			if(ContextWord_Map==null)
				ContextWord_Map = new HashMap<String, Double>();
			ContextWord_Map.put(e.getKey(), weight);
		}	
		//System.out.println("Weights calculated");
		logger.info("词的权重信息，这一块不是很明白");
		for(String word: ContextWord_Map.keySet()){
			logger.info(word +" -> "+ ContextWord_Map.get(word));
		}
	}
	
	/**
	 * @param sent
	 * @param sent_variants
	 * @return
	 * @throws IOException
	 */
	public Set<String> ExtractContextWords(String sent, Set<String> sent_variants) throws IOException {
		Set<String> Sent_ContextWords = new HashSet<String>();
		String sentence1 = "间断口干乏力TUS加重伴头晕TUS";
		String sentence2 = "间断口干多饮TUS加重伴乏力TUS";
		String sentence3 = "口干口渴伴尿中泡沫TUS加重TUS";
		String sentence4 = "间断口干S多饮S乏力TUS加重伴头晕目眩TUS";
		String sentence5 = "间断口干多饮TUS加重伴手足麻木疼痛TUS";
		String sentence6 = "间断口干多饮TUS加重TUS";
		
		if(sent.equals(sentence1)){
			Sent_ContextWords.add("间断");
			Sent_ContextWords.add("加重");
		}else if(sent.equals(sentence2)){
			Sent_ContextWords.add("间断");
			Sent_ContextWords.add("加重");
		}else if(sent.equals(sentence3)){
			Sent_ContextWords.add("口渴");
			Sent_ContextWords.add("泡沫");
			Sent_ContextWords.add("加重");
		}else if(sent.equals(sentence4)){
			Sent_ContextWords.add("间断");
			Sent_ContextWords.add("加重");
			Sent_ContextWords.add("目眩");
		}else if(sent.equals(sentence5)){
			Sent_ContextWords.add("间断");
			Sent_ContextWords.add("加重");
			Sent_ContextWords.add("疼痛");
		}else if(sent.equals(sentence6)){
			Sent_ContextWords.add("间断");
			Sent_ContextWords.add("加重");
		}
		
		return Sent_ContextWords;		
	}
//	public Set<String> ExtractContextWords(String sent, Set<String> sent_variants) throws IOException {
//		String[] tokens = NLPToolsControllerOpenNLP.getInstance().getTokeniser().tokenize(sent); //把句子分成一个一个的单词，这些内容实验的时候都可以把中间数据给输出出来
//		String POSTags[] = NLPToolsControllerOpenNLP.getInstance().getPosTagger().tag(tokens); //把单词标上词性
//		
//		
//		//这里的内容可以用分词工具直接给搞定了
//		//同样这个方法可以操作的地方有很多
//		
//		List<String> Noun_Tags = Arrays.asList("NN","NNS","NNP","NNPS");
//		List<String> Verb_Tags = Arrays.asList("VB", "VBD", "VBG", "VBN", "VBP", "VBZ");
//		List<String> Adj_Tags =  Arrays.asList("JJ");
//		
//		Set<String> Sent_ContextWords = new HashSet<String>();
//		
//		outermostLoop:
//			for(int i = 0; i<POSTags.length ; ){
//				if(!Noun_Tags.contains(POSTags[i]) && !Verb_Tags.contains(POSTags[i]) && !Adj_Tags.contains(POSTags[i])) {//如果没能标注词性就跳过
//					i++;
//					continue;
//				}else if(stoplist.isStopWord(tokens[i])){//如果在停用词表中出现就跳过
//					i++;
//					continue;
//				}else{
//					//for single word terms
//					if(sent_variants.contains(tokens[i])){//该词是term中的词也同样的跳过，所以现在就是要获得上下文的信息
//						i++;				
//						continue;
//					}else{
//						int idx = i;
//						int v_idx=0;
//						variantLoop:
//							for(String variant: sent_variants) {
//								v_idx++;
//								//如果有个词都没有符合的，就加上
//								if(variant.indexOf(tokens[idx])==-1){
//									if(v_idx == sent_variants.size()){										
//										Sent_ContextWords.add(tokens[idx]);	
//										i++;
//										continue outermostLoop;
//									}
//									continue variantLoop;					
//								}else if(variant.startsWith(tokens[idx]) && variant.contains(" ")){
//									
//									String[] split_variant = variant.split(" ");
//									
//									int j;
//									for(j=0; j<split_variant.length;){
//										if(idx < tokens.length && split_variant[j].equals(tokens[idx])){
//											idx++;
//											j++;
//											continue;		
//										}else{
//											idx=i;
//											j=0;
//											continue variantLoop;
//										}											
//									}
//									if(j==split_variant.length){
//										i+=j;
//										continue outermostLoop;
//									}else{
//										Sent_ContextWords.add(tokens[i]);
//										i++;
//										continue outermostLoop;
//									}
//								}
//							}
//						Sent_ContextWords.add(tokens[i]);
//						i++;
//						continue outermostLoop;		
//					}
//				}			
//			}
//		return Sent_ContextWords;		
//	}
	
	
	
	
}
